{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Author: Antti Kiviaho\n",
    "# Date: 20.1.2023\n",
    "# A script for running normalization and sample integration clustering.\n",
    "# Uses the scbi integration environment and pipeline:\n",
    "#\n",
    "#\n",
    "# 1. Cell and gene filtering\n",
    "# 2. scran normalization through R interface using\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.chdir('/lustre/scratch/kiviaho/prostate_spatial/')\n",
    "import numpy as np\n",
    "import anndata as ad\n",
    "import scanpy as sc\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import scib\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Added spot exclusion information 28.2.2024\n",
    "from scripts.utils import get_sample_ids_reorder, save_to_pickle, get_include_exclude_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def qc_and_normalize(adata):\n",
    "    # QC and normalize\n",
    "    sc.pp.filter_genes(adata, min_cells=5)\n",
    "    sc.pp.filter_cells(adata, min_counts=500)\n",
    "    scib.preprocessing.normalize(adata,precluster=False)\n",
    "    return adata\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This non-pathology-filtered dictionary was used for cell2location\n",
    "# As the annotation wasn't available at the time of mapping\n",
    "\n",
    "samples = get_sample_ids_reorder(['BPH','untreated','bicalutamide','goserelin','CRPC'])\n",
    "samples_dict = {} # A data structure for saving data\n",
    "for sample_id in samples:\n",
    "  adata_sample = sc.read_visium('./results/'+sample_id+'/outs/',library_id=sample_id)\n",
    "  adata_sample.var_names_make_unique()\n",
    "  adata_sample.obs_names = sample_id + '_' + adata_sample.obs_names # add ids to the data for use after data concatenation\n",
    "  adata_sample = qc_and_normalize(adata_sample) # QC and normalize – this filters out more spots\n",
    "  samples_dict[sample_id] = adata_sample.copy()\n",
    "  print(sample_id + ' done')\n",
    "  del adata_sample\n",
    "\n",
    "save_to_pickle(samples_dict,'./data/normalized_no_pathology_filter_visium_data.pkl')\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Revised on 28.2.2024:\n",
    "# Remove regions with exclude/include annotation (exclude_info)\n",
    "# Save each sample separately for better memory management\n",
    "\n",
    "samples = get_sample_ids_reorder()\n",
    "exclude_info = get_include_exclude_info()\n",
    "\n",
    "\n",
    "obs_data_list = [] # A data structure for saving info on valid spots\n",
    "for sample_id in samples:\n",
    "    \n",
    "  adata_sample = sc.read_visium('./results/'+sample_id+'/outs/',library_id=sample_id)\n",
    "  adata_sample.var_names_make_unique()\n",
    "\n",
    "  # add ids to the data for use after data concatenation\n",
    "  adata_sample.obs_names = sample_id + '_' + adata_sample.obs_names\n",
    "\n",
    "  # Only use this with Tampere cohort samples\n",
    "  # Subset spots from a single sample\n",
    "  sample_exclude_info = exclude_info.loc[adata_sample.obs_names].copy()\n",
    "  sample_spots_to_keep =  sample_exclude_info[~sample_exclude_info['Pathology'].isin(['Exclude','Lumen'])].index\n",
    "  # Subset the sample with spots to keep\n",
    "  adata_sample = adata_sample[sample_spots_to_keep]\n",
    "  \n",
    "  # QC and normalize – this filters out more spots\n",
    "  adata_sample = qc_and_normalize(adata_sample)\n",
    "\n",
    "  # Save the object\n",
    "  adata_sample.write_h5ad('data/normalized_visium/'+sample_id+'_normalized.h5ad')\n",
    "  \n",
    "  # Save the obs data to a list\n",
    "  obs_data_list.append(adata_sample.obs)\n",
    "\n",
    "  print(sample_id + ' done')\n",
    "  del adata_sample\n",
    "\n",
    "# Save the ids of all spots that passed the qc (110681)\n",
    "pd.DataFrame(index=pd.concat(obs_data_list).index).to_csv('./data/post_qc_and_pathology_annot_valid_spots.csv')\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
